{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33e36462-0981-4c37-bbe7-bc356ab39bde",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install -qU \"elasticsearch<9\" sentence-transformers==2.7.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d92d41fb-975f-4f45-8e8a-13c1c11a55b5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elastic Cloud ID:  ········\n",
      "Elastic Api Key:  ········\n"
     ]
    }
   ],
   "source": [
    "# get the Elasticsearch client\n",
    "from elasticsearch import Elasticsearch, exceptions\n",
    "from getpass import getpass\n",
    "import time\n",
    "\n",
    "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
    "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
    "\n",
    "client = Elasticsearch(\n",
    "    cloud_id=ELASTIC_CLOUD_ID,\n",
    "    api_key=ELASTIC_API_KEY,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "00acf8a3-fad8-4be8-b8a8-17c37fcde1cc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# delete model if already downloaded and deployed\n",
    "try:\n",
    "    client.ml.delete_trained_model(model_id=\".elser_model_2\", force=True)\n",
    "    print(\"Model deleted successfully, We will proceed with creating one\")\n",
    "except exceptions.NotFoundError:\n",
    "    print(\"Model doesn't exist, but We will proceed with creating one\")\n",
    "\n",
    "# Creates the ELSER model configuration. Automatically downloads the model if it doesn't exist.\n",
    "client.ml.put_trained_model(\n",
    "    model_id=\".elser_model_2\", input={\"field_names\": [\"text_field\"]}\n",
    ")\n",
    "\n",
    "while True:\n",
    "    status = client.ml.get_trained_models(\n",
    "        model_id=\".elser_model_2\", include=\"definition_status\"\n",
    "    )\n",
    "\n",
    "    if status[\"trained_model_configs\"][0][\"fully_defined\"]:\n",
    "        break\n",
    "    time.sleep(5)\n",
    "\n",
    "# Start trained model deployment if not already deployed\n",
    "client.ml.start_trained_model_deployment(\n",
    "    model_id=\".elser_model_2\", number_of_allocations=1, wait_for=\"starting\"\n",
    ")\n",
    "\n",
    "while True:\n",
    "    status = client.ml.get_trained_models_stats(\n",
    "        model_id=\".elser_model_2\",\n",
    "    )\n",
    "    if status[\"trained_model_stats\"][0][\"deployment_stats\"][\"state\"] == \"started\":\n",
    "        print(\"ELSER Model has been successfully deployed.\")\n",
    "        break\n",
    "    else:\n",
    "        print(\"ELSER Model is currently being deployed.\")\n",
    "    time.sleep(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1ad5f6d6-cc07-4f62-a7ab-25f635e4f111",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ObjectApiResponse({'errors': False, 'took': 69, 'items': [{'index': {'_index': 'books', '_id': 'd8CPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 0, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'eMCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 1, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'ecCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 2, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'esCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 3, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'e8CPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 4, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'fMCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 5, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'fcCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 6, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'fsCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 7, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'f8CPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 8, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'gMCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 9, '_primary_term': 1, 'status': 201}}]})"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import json\n",
    "from urllib.request import urlopen\n",
    "from sentence_transformers import SentenceTransformer\n",
    "\n",
    "# these tests need book_index to exist ahead of time\n",
    "client.indices.delete(index=\"books\", ignore_unavailable=True)\n",
    "\n",
    "mappings = {\n",
    "    \"properties\": {\n",
    "        \"title_vector\": {\n",
    "            \"type\": \"dense_vector\",\n",
    "            \"dims\": 384,\n",
    "            \"index\": \"true\",\n",
    "            \"similarity\": \"cosine\",\n",
    "        }\n",
    "    }\n",
    "}\n",
    "client.indices.create(index=\"books\", mappings=mappings)\n",
    "\n",
    "url = \"https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/notebooks/search/data.json\"\n",
    "response = urlopen(url)\n",
    "books = json.loads(response.read())\n",
    "\n",
    "model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n",
    "operations = []\n",
    "for book in books:\n",
    "    operations.append({\"index\": {\"_index\": \"books\"}})\n",
    "    # Transforming the title into an embedding using the model\n",
    "    book[\"title_vector\"] = model.encode(book[\"title\"]).tolist()\n",
    "    operations.append(book)\n",
    "client.bulk(index=\"books\", operations=operations, refresh=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65b92c67-b94b-4eda-923d-ed81706c10f9",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
